Assume we have a continious 2-piecewise defined function $f(x) = a_l x + b (x < 0)$/$f(x) = a_r x + b (x > 0)$. Can we detect the "jump in slope" at x=0 from a noisy dataset? Here, noisy means that we add iid $(0, \sigma)$-distributed Gaussian noise to each $f(x_i)$.



In [78]:

    
from tools.plot import plot
a_l, a_r = 1., -1.
b = 4
sigma = 2.
n_samples = 100
twolin = lambda x, xc, a, b, c: (x < xc) * a * (x - xc) \
        + (x >= xc) * b * (x - xc) + c
f = lambda x: twolin(x, 0, a_l, a_r, b)
xs = np.random.uniform(low=-3, high=3, size=n_samples)
ys = f(xs) + np.random.normal(scale=sigma, size=n_samples)

plot(f, (-3, 3))
pl.plot(xs, ys, lw=0, marker='x')









    Out[78]:





[<matplotlib.lines.Line2D at 0x7fbbf8fca950>]



In [79]:

    
def error(f):
    return np.sum((f(xs) - ys)**2) / len(xs)



In [80]:

    
def get_split_fit(xc):
    sel = xs < xc
    if sum(sel) < 2 or sum(sel) > len(sel) - 2: return False
    
    n = sum(sel) / len(sel)
    m = np.mean
    num = lambda x, y: m(y) - m(x - xc) * m((x - xc) * y) / m((x - xc)**2)
    den = lambda x: m((x - xc))**2 / m((x - xc)**2)
    slo = lambda x, y: (m((x - xc) * y) - c * m(x - xc)) / m((x - xc)**2)
    
    c = (n * num(xs[sel], ys[sel]) + (1-n) * num(xs[-sel], ys[-sel])) / \
         (1 - n * den(xs[sel]) - (1-n) * den(xs[-sel]))
    a = slo(xs[sel], ys[sel])
    b = slo(xs[-sel], ys[-sel])
    
    return a, b, c



In [81]:

    
for xc in np.linspace(-2, 2, 50):
    vals = get_split_fit(xc)
    pl.scatter([xc], error(lambda x: twolin(x, xc, *vals)))



In [83]:

    
xc = 0
vals = get_split_fit(xc)
plot(f, (-3, 3))
pl.plot(xs, ys, lw=0, marker='x')
plot(lambda x: twolin(x, xc, *vals), (-3, 3))
print(vals)









    



(0.93151053098532355, -0.66037084723455841, 3.921483985453853)

Now, can we do this Baysian?



In [2]:

    
import pymc as pm



In [ ]: